

/*------------------------------------------------------------------------------
	1. Housekeeping
------------------------------------------------------------------------------*/

	*Run housekeeping code
		qui do ".../replication_package/housekeeping.do"

	*Write log
		log using "$log_loc/figure_a2.log", text replace

	*Create Empty Tempfile to Store Regression Results
		tempfile figure_a2
		save `figure_a2', emptyok

/*------------------------------------------------------------------------------
	2. Determine Relevant ids
------------------------------------------------------------------------------*/

		*Import Data
		local year = 2019
		use "$data/childrens`year'.dta", clear
			
			/*Restrict Sample*/
				*Keep f1095 filings
					rename state_insurance state
					fmerge m:1 state using "$data/statelist.dta"
						assert _merge != 2 //Confirm that all 51 states are in dataset
						drop if _merge != 3

				*Keep if age_num is within the range
					keep if age_num >= 0 & age_num <= 16

				*Keep if kid died after end of year or is currently alive
					keep if `year'*10000+1231<death_date|death_date==0

				*Keep if months covered is possible
					keep if num_cov_ins > 0 & num_cov_ins <= 12

				*Keep if unclaimed
					keep if file_inc == .

				*keep ids
					keep id
					duplicates drop
	
			tempfile unclaimed_kids_2019
			save `unclaimed_kids_2019', replace


/*------------------------------------------------------------------------------
	3. Clean Main Datasets
------------------------------------------------------------------------------*/

	foreach year in 2017 2018 2020 2021 {

		*Import Data
		use "$data/childrens`year'.dta", clear

			/*Merge in unclaimed kids*/
				merge 1:1 id using `unclaimed_kids_2019'
					keep if _merge == 3
					drop _merge

			/*Restrict Sample*/
				*Keep observations in the 51 states (per the f1095 variable). Note that most of the dropped state values are missing.
					rename state_insurance state
					fmerge m:1 state using "$data/statelist.dta"
						assert _merge != 2 //Confirm that all 51 states are in dataset
						drop if _merge != 3

				*Keep if age_num is within the range
					keep if age_num >= 0 & age_num <= 16

				*Keep if kid died after end of year or is currently alive
					keep if `year'*10000+1231<death_date|death_date==0

				*Keep if months covered is possible
					keep if num_cov_ins > 0 & num_cov_ins <= 12

			/*Generate variables for figures*/	
				*Generate outcome variable on claiming
					gen onreturn = file_inc != .

				*Generate a count variable
					gen ct = 1

				*Collapse to the by-age_num level
					collapse (sum) ct onreturn

				*Merge in Census Data
					gen year = `year'

				*Adjust for claiming limits
					replace onreturn = onreturn * 1.0017029

				*Generate outcome (i.e., percent of children claimed)
					gen perc_return = onreturn/ct
			
				*Adjust outcome to be out of 100
					replace perc_return = perc_return*100

			/*Save result as a tempfile*/
				append using `figure_a2'
				tempfile figure_a2
				save `figure_a2', replace	

	}

	use `figure_a2', clear

/*------------------------------------------------------------------------------
	4. Create Figure 1
------------------------------------------------------------------------------*/
			
		expand 2 if year == 2018
		bysort year: replace year = year + _n - 1
		replace perc_return = 0 if year == 2019

			/*Create Figures*/	
				*Create Graph
				twoway (connected perc_return year, msymbol(o) mlcolor(gs1) mfcolor(white) clcolor(gs1) lpattern(dash)) , ///
					ylabel(, format(%5.1f) labsize(small)) ///
					xtitle("Year", height(4)) ///
					xlabel(2017(1)2021, labsize(small)) ///	
					ytitle("Share Claimed (%)", size(medsmall)) ///
					graphregion(fcolor(white) color(white) lcolor(white))


				*Export Graph
					graph export "${output}/unclaimed_surrounding_years.png", replace as(png)
